install.packages("maps")
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.2/maps_3.4.1.tgz'
Content type 'application/x-gzip' length 3112293 bytes (3.0 MB)
==================================================
downloaded 3.0 MB
The downloaded binary packages are in
/var/folders/c4/xtbqs4rn3jg2c0ccfbf2bm980000gn/T//RtmpaLtc10/downloaded_packages
install.packages("mapdata")
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.2/mapdata_2.3.1.tgz'
Content type 'application/x-gzip' length 25478897 bytes (24.3 MB)
==================================================
downloaded 24.3 MB
The downloaded binary packages are in
/var/folders/c4/xtbqs4rn3jg2c0ccfbf2bm980000gn/T//RtmpaLtc10/downloaded_packages
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.1 ✔ purrr 0.3.5
✔ tibble 3.1.8 ✔ dplyr 1.1.0
✔ tidyr 1.2.1 ✔ stringr 1.4.1
✔ readr 2.1.3 ✔ forcats 0.5.2── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(mapdata)
Loading required package: maps
Attaching package: ‘maps’
The following object is masked from ‘package:purrr’:
map
library(maps)
df_death <- read.csv('/Users/farah/Documents/Term 2 Courses/Analytical Modeling /Group Project/natural-disasters.csv')
df_death
We can see the above dataset contains data from the year 1900s.
Data Transformation:
# Remove rows with all years until 2012
df_filtered <- df_death %>%
filter(df_death$Year >= 2012)
df_filtered
column_names <- names(df_filtered)
print(column_names)
[1] "Country.name" "Year"
[3] "Number.of.deaths.from.drought" "Number.of.people.injured.from.drought"
[5] "Number.of.people.affected.from.drought" "Number.of.people.left.homeless.from.drought"
[7] "Number.of.total.people.affected.by.drought" "Reconstruction.costs.from.drought"
[9] "Insured.damages.against.drought" "Total.economic.damages.from.drought"
[11] "Death.rates.from.drought" "Injury.rates.from.drought"
[13] "Number.of.people.affected.by.drought.per.100.000" "Homelessness.rate.from.drought"
[15] "Total.number.of.people.affected.by.drought.per.100.000" "Number.of.deaths.from.earthquakes"
[17] "Number.of.people.injured.from.earthquakes" "Number.of.people.affected.by.earthquakes"
[19] "Number.of.people.left.homeless.from.earthquakes" "Number.of.total.people.affected.by.earthquakes"
[21] "Reconstruction.costs.from.earthquakes" "Insured.damages.against.earthquakes"
[23] "Total.economic.damages.from.earthquakes" "Death.rates.from.earthquakes"
[25] "Injury.rates.from.earthquakes" "Number.of.people.affected.by.earthquakes.per.100.000"
[27] "Homelessness.rate.from.earthquakes" "Total.number.of.people.affected.by.earthquakes.per.100.000"
[29] "Number.of.deaths.from.disasters" "Number.of.people.injured.from.disasters"
[31] "Number.of.people.affected.by.disasters" "Number.of.people.left.homeless.from.disasters"
[33] "Number.of.total.people.affected.by.disasters" "Reconstruction.costs.from.disasters"
[35] "Insured.damages.against.disasters" "Total.economic.damages.from.disasters"
[37] "Death.rates.from.disasters" "Injury.rates.from.disasters"
[39] "Number.of.people.affected.by.disasters.per.100.000" "Homelessness.rate.from.disasters"
[41] "Total.number.of.people.affected.by.disasters.per.100.000" "Number.of.deaths.from.volcanic.activity"
[43] "Number.of.people.injured.from.volcanic.activity" "Number.of.people.affected.by.volcanic.activity"
[45] "Number.of.people.left.homeless.from.volcanic.activity" "Number.of.total.people.affected.by.volcanic.activity"
[47] "Reconstruction.costs.from.volcanic.activity" "Insured.damages.against.volcanic.activity"
[49] "Total.economic.damages.from.volcanic.activity" "Death.rates.from.volcanic.activity"
[51] "Injury.rates.from.volcanic.activity" "Number.of.people.affected.by.volcanic.activity.per.100.000"
[53] "Homelessness.rate.from.volcanic.activity" "Total.number.of.people.affected.by.volcanic.activity.per.100.000"
[55] "Number.of.deaths.from.floods" "Number.of.people.injured.from.floods"
[57] "Number.of.people.affected.by.floods" "Number.of.people.left.homeless.from.floods"
[59] "Number.of.total.people.affected.by.floods" "Reconstruction.costs.from.floods"
[61] "Insured.damages.against.floods" "Total.economic.damages.from.floods"
[63] "Death.rates.from.floods" "Injury.rates.from.floods"
[65] "Number.of.people.affected.by.floods.per.100.000" "Homelessness.rate.from.floods"
[67] "Total.number.of.people.affected.by.floods.per.100.000" "Number.of.deaths.from.mass.movements"
[69] "Number.of.people.injured.from.mass.movements" "Number.of.people.affected.by.mass.movements"
[71] "Number.of.people.left.homeless.from.mass.movements" "Number.of.total.people.affected.by.mass.movements"
[73] "Reconstruction.costs.from.mass.movements" "Insured.damages.against.mass.movements"
[75] "Total.economic.damages.from.mass.movements" "Death.rates.from.mass.movements"
[77] "Injury.rates.from.mass.movements" "Number.of.people.affected.by.mass.movements.per.100.000"
[79] "Homelessness.rate.from.mass.movements" "Total.number.of.people.affected.by.mass.movements.per.100.000"
[81] "Number.of.deaths.from.storms" "Number.of.people.injured.from.storms"
[83] "Number.of.people.affected.by.storms" "Number.of.people.left.homeless.from.storms"
[85] "Number.of.total.people.affected.by.storms" "Reconstruction.costs.from.storms"
[87] "Insured.damages.against.storms" "Total.economic.damages.from.storms"
[89] "Death.rates.from.storms" "Injury.rates.from.storms"
[91] "Number.of.people.affected.by.storms.per.100.000" "Homelessness.rate.from.storms"
[93] "Total.number.of.people.affected.by.storms.per.100.000" "Number.of.deaths.from.landslides"
[95] "Number.of.people.injured.from.landslides" "Number.of.people.affected.by.landslides"
[97] "Number.of.people.left.homeless.from.landslides" "Number.of.total.people.affected.by.landslides"
[99] "Reconstruction.costs.from.landslides" "Insured.damages.against.landslides"
[101] "Total.economic.damages.from.landslides" "Death.rates.from.landslides"
[103] "Injury.rates.from.landslides" "Number.of.people.affected.by.landslides.per.100.000"
[105] "Homelessness.rate.from.landslides" "Total.number.of.people.affected.by.landslides.per.100.000"
[107] "Number.of.deaths.from.fog" "Number.of.people.injured.from.fog"
[109] "Number.of.people.affected.by.fog" "Number.of.people.left.homeless.from.fog"
[111] "Number.of.total.people.affected.by.fog" "Reconstruction.costs.from.fog"
[113] "Insured.damages.against.fog" "Total.economic.damages.from.fog"
[115] "Death.rates.from.fog" "Injury.rates.from.fog"
[117] "Number.of.people.affected.by.fog.per.100.000" "Homelessness.rate.from.fog"
[119] "Total.number.of.people.affected.by.fog.per.100.000" "Number.of.deaths.from.wildfires"
[121] "Number.of.people.injured.from.wildfires" "Number.of.people.affected.by.wildfires"
[123] "Number.of.people.left.homeless.from.wildfires" "Number.of.total.people.affected.by.wildfires"
[125] "Reconstruction.costs.from.wildfires" "Insured.damages.against.wildfires"
[127] "Total.economic.damages.from.wildfires" "Death.rates.from.wildfires"
[129] "Injury.rates.from.wildfires" "Number.of.people.affected.by.wildfires.per.100.000"
[131] "Homelessness.rate.from.wildfires" "Total.number.of.people.affected.by.wildfires.per.100.000"
[133] "Number.of.deaths.from.extreme.temperatures" "Number.of.people.injured.from.extreme.temperatures"
[135] "Number.of.people.affected.by.extreme.temperatures" "Number.of.people.left.homeless.from.extreme.temperatures"
[137] "Number.of.total.people.affected.by.extreme.temperatures" "Reconstruction.costs.from.extreme.temperatures"
[139] "Insured.damages.against.extreme.temperatures" "Total.economic.damages.from.extreme.temperatures"
[141] "Death.rates.from.extreme.temperatures" "Injury.rates.from.extreme.temperatures"
[143] "Number.of.people.affected.by.extreme.temperatures.per.100.000" "Homelessness.rate.from.extreme.temperatures"
[145] "Total.number.of.people.affected.by.extreme.temperatures.per.100.000" "Number.of.deaths.from.glacial.lake.outbursts"
[147] "Number.of.people.injured.from.glacial.lake.outbursts" "Number.of.people.affected.by.glacial.lake.outbursts"
[149] "Number.of.people.left.homeless.from.glacial.lake.outbursts" "Number.of.total.people.affected.by.glacial.lake.outbursts"
[151] "Reconstruction.costs.from.glacial.lake.outbursts" "Insured.damages.against.glacial.lake.outbursts"
[153] "Total.economic.damages.from.glacial.lake.outbursts" "Death.rates.from.glacial.lake.outbursts"
[155] "Injury.rates.from.glacial.lake.outbursts" "Number.of.people.affected.by.glacial.lake.outbursts.per.100.000"
[157] "Homelessness.rate.from.glacial.lake.outbursts" "Total.number.of.people.affected.by.glacial.lake.outbursts.per.100.000"
[159] "Total.economic.damages.from.disasters.as.a.share.of.GDP" "Total.economic.damages.from.drought.as.a.share.of.GDP"
[161] "Total.economic.damages.from.earthquakes.as.a.share.of.GDP" "Total.economic.damages.from.extreme.temperatures.as.a.share.of.GDP"
[163] "Total.economic.damages.from.floods.as.a.share.of.GDP" "Total.economic.damages.from.landslides.as.a.share.of.GDP"
[165] "Total.economic.damages.from.mass.movements.as.a.share.of.GDP" "Total.economic.damages.from.storms.as.a.share.of.GDP"
[167] "Total.economic.damages.from.volcanic.activity.as.a.share.of.GDP" "Total.economic.damages.from.volcanic.activity.as.a.share.of.GDP.1"
[169] "total_damages_pct_gdp_glacial_lake"
Since, there are a large number of columns, we are dropping those columns which do not contain the word death or total and people in them
#remove all columns without the word death or total & people in them
df_death <- df_filtered %>% select(contains("death") | contains( "total") & contains("people") | contains("Year") | contains("Country"))
#Reorder the column names
df_death <- df_death %>%
select('Country.name','Year', everything())
Updated columns list:
column_names <- names(df_death)
print(column_names)
[1] "Country.name" "Year"
[3] "Number.of.deaths.from.drought" "Death.rates.from.drought"
[5] "Number.of.deaths.from.earthquakes" "Death.rates.from.earthquakes"
[7] "Number.of.deaths.from.disasters" "Death.rates.from.disasters"
[9] "Number.of.deaths.from.volcanic.activity" "Death.rates.from.volcanic.activity"
[11] "Number.of.deaths.from.floods" "Death.rates.from.floods"
[13] "Number.of.deaths.from.mass.movements" "Death.rates.from.mass.movements"
[15] "Number.of.deaths.from.storms" "Death.rates.from.storms"
[17] "Number.of.deaths.from.landslides" "Death.rates.from.landslides"
[19] "Number.of.deaths.from.fog" "Death.rates.from.fog"
[21] "Number.of.deaths.from.wildfires" "Death.rates.from.wildfires"
[23] "Number.of.deaths.from.extreme.temperatures" "Death.rates.from.extreme.temperatures"
[25] "Number.of.deaths.from.glacial.lake.outbursts" "Death.rates.from.glacial.lake.outbursts"
[27] "Number.of.total.people.affected.by.drought" "Total.number.of.people.affected.by.drought.per.100.000"
[29] "Number.of.total.people.affected.by.earthquakes" "Total.number.of.people.affected.by.earthquakes.per.100.000"
[31] "Number.of.total.people.affected.by.disasters" "Total.number.of.people.affected.by.disasters.per.100.000"
[33] "Number.of.total.people.affected.by.volcanic.activity" "Total.number.of.people.affected.by.volcanic.activity.per.100.000"
[35] "Number.of.total.people.affected.by.floods" "Total.number.of.people.affected.by.floods.per.100.000"
[37] "Number.of.total.people.affected.by.mass.movements" "Total.number.of.people.affected.by.mass.movements.per.100.000"
[39] "Number.of.total.people.affected.by.storms" "Total.number.of.people.affected.by.storms.per.100.000"
[41] "Number.of.total.people.affected.by.landslides" "Total.number.of.people.affected.by.landslides.per.100.000"
[43] "Number.of.total.people.affected.by.fog" "Total.number.of.people.affected.by.fog.per.100.000"
[45] "Number.of.total.people.affected.by.wildfires" "Total.number.of.people.affected.by.wildfires.per.100.000"
[47] "Number.of.total.people.affected.by.extreme.temperatures" "Total.number.of.people.affected.by.extreme.temperatures.per.100.000"
[49] "Number.of.total.people.affected.by.glacial.lake.outbursts" "Total.number.of.people.affected.by.glacial.lake.outbursts.per.100.000"
# Check data types of columns in df_death
sapply(df_death, class)
Country.name Year
"character" "integer"
Number.of.deaths.from.drought Death.rates.from.drought
"integer" "numeric"
Number.of.deaths.from.earthquakes Death.rates.from.earthquakes
"integer" "numeric"
Number.of.deaths.from.disasters Death.rates.from.disasters
"integer" "numeric"
Number.of.deaths.from.volcanic.activity Death.rates.from.volcanic.activity
"integer" "numeric"
Number.of.deaths.from.floods Death.rates.from.floods
"integer" "numeric"
Number.of.deaths.from.mass.movements Death.rates.from.mass.movements
"integer" "numeric"
Number.of.deaths.from.storms Death.rates.from.storms
"integer" "numeric"
Number.of.deaths.from.landslides Death.rates.from.landslides
"integer" "numeric"
Number.of.deaths.from.fog Death.rates.from.fog
"integer" "numeric"
Number.of.deaths.from.wildfires Death.rates.from.wildfires
"integer" "numeric"
Number.of.deaths.from.extreme.temperatures Death.rates.from.extreme.temperatures
"integer" "numeric"
Number.of.deaths.from.glacial.lake.outbursts Death.rates.from.glacial.lake.outbursts
"integer" "numeric"
Number.of.total.people.affected.by.drought Total.number.of.people.affected.by.drought.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.earthquakes Total.number.of.people.affected.by.earthquakes.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.disasters Total.number.of.people.affected.by.disasters.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.volcanic.activity Total.number.of.people.affected.by.volcanic.activity.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.floods Total.number.of.people.affected.by.floods.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.mass.movements Total.number.of.people.affected.by.mass.movements.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.storms Total.number.of.people.affected.by.storms.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.landslides Total.number.of.people.affected.by.landslides.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.fog Total.number.of.people.affected.by.fog.per.100.000
"integer" "integer"
Number.of.total.people.affected.by.wildfires Total.number.of.people.affected.by.wildfires.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.extreme.temperatures Total.number.of.people.affected.by.extreme.temperatures.per.100.000
"integer" "numeric"
Number.of.total.people.affected.by.glacial.lake.outbursts Total.number.of.people.affected.by.glacial.lake.outbursts.per.100.000
"integer" "numeric"
Summary statistics for each variable:
# Calculate summary statistics for each variable
df_summary <- df_death %>%
select(-c(Year, Country.name)) %>%
summarise(across(everything(), list(mean = ~mean(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
median = ~median(., na.rm = TRUE))))
Warning: There were 8 warnings in `summarise()`.
The first warning was:
ℹ In argument: `across(...)`.
Caused by warning in `max()`:
! no non-missing arguments to max; returning -Inf
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 7 remaining warnings.
df_summary
#summary
df_summary_long <- df_summary %>%
pivot_longer(cols = everything(),
names_to = c("variable", ".value"),
names_pattern = "(.*)_(.*)")
df_summary_long
Exporting the updated columns dataset to a csv file.
df_death <- df_death %>%
select(Country.name,Year, everything())
write.csv(df_death, '/Users/farah/Downloads/natural-disasters_final.csv', row.names=TRUE)
Check the number of unique countries in the dataset
n_distinct(df_death$Country.name)
[1] 211
unique(df_death$Country.name)
[1] "Afghanistan" "Africa" "Albania" "Algeria"
[5] "Angola" "Anguilla" "Antigua and Barbuda" "Argentina"
[9] "Armenia" "Asia" "Australia" "Austria"
[13] "Azerbaijan" "Bahamas" "Bangladesh" "Barbados"
[17] "Belarus" "Belgium" "Belize" "Benin"
[21] "Bhutan" "Bolivia" "Bosnia and Herzegovina" "Botswana"
[25] "Brazil" "British Virgin Islands" "Bulgaria" "Burkina Faso"
[29] "Burundi" "Cambodia" "Cameroon" "Canada"
[33] "Cape Verde" "Central African Republic" "Chad" "Chile"
[37] "China" "Colombia" "Comoros" "Congo"
[41] "Costa Rica" "Cote d'Ivoire" "Croatia" "Cuba"
[45] "Cyprus" "Czechia" "Democratic Republic of Congo" "Denmark"
[49] "Djibouti" "Dominica" "Dominican Republic" "Ecuador"
[53] "Egypt" "El Salvador" "Estonia" "Eswatini"
[57] "Ethiopia" "Europe" "European Union (27)" "Fiji"
[61] "France" "French Guiana" "French Polynesia" "Gabon"
[65] "Gambia" "Georgia" "Germany" "Ghana"
[69] "Greece" "Guadeloupe" "Guatemala" "Guinea"
[73] "Guinea-Bissau" "Guyana" "Haiti" "High-income countries"
[77] "Honduras" "Hong Kong" "Hungary" "India"
[81] "Indonesia" "Iran" "Iraq" "Ireland"
[85] "Isle of Man" "Israel" "Italy" "Jamaica"
[89] "Japan" "Jordan" "Kazakhstan" "Kenya"
[93] "Kiribati" "Kuwait" "Kyrgyzstan" "Laos"
[97] "Latvia" "Lebanon" "Lesotho" "Liberia"
[101] "Libya" "Lithuania" "Low-income countries" "Lower-middle-income countries"
[105] "Luxembourg" "Macao" "Madagascar" "Malawi"
[109] "Malaysia" "Maldives" "Mali" "Marshall Islands"
[113] "Martinique" "Mauritania" "Mauritius" "Mexico"
[117] "Micronesia (country)" "Moldova" "Mongolia" "Montenegro"
[121] "Morocco" "Mozambique" "Myanmar" "Namibia"
[125] "Nepal" "Netherlands" "New Caledonia" "New Zealand"
[129] "Nicaragua" "Niger" "Nigeria" "North America"
[133] "North Korea" "North Macedonia" "Northern Mariana Islands" "Norway"
[137] "Oceania" "Oman" "Pakistan" "Palau"
[141] "Palestine" "Panama" "Papua New Guinea" "Paraguay"
[145] "Peru" "Philippines" "Poland" "Portugal"
[149] "Puerto Rico" "Qatar" "Reunion" "Romania"
[153] "Russia" "Rwanda" "Saint Barthelemy" "Saint Kitts and Nevis"
[157] "Saint Lucia" "Saint Martin (French part)" "Saint Vincent and the Grenadines" "Samoa"
[161] "Sao Tome and Principe" "Saudi Arabia" "Senegal" "Serbia"
[165] "Seychelles" "Sierra Leone" "Sint Maarten (Dutch part)" "Slovakia"
[169] "Slovenia" "Solomon Islands" "Somalia" "South Africa"
[173] "South America" "South Korea" "South Sudan" "Spain"
[177] "Sri Lanka" "Sudan" "Suriname" "Sweden"
[181] "Switzerland" "Syria" "Taiwan" "Tajikistan"
[185] "Tanzania" "Thailand" "Timor" "Togo"
[189] "Tonga" "Trinidad and Tobago" "Tunisia" "Turkey"
[193] "Turks and Caicos Islands" "Tuvalu" "Uganda" "Ukraine"
[197] "United Arab Emirates" "United Kingdom" "United States" "United States Virgin Islands"
[201] "Upper-middle-income countries" "Uruguay" "Uzbekistan" "Vanuatu"
[205] "Venezuela" "Vietnam" "Wallis and Futuna" "World"
[209] "Yemen" "Zambia" "Zimbabwe"
There are some values such as low/medium/high income countries, World and region name.
#Creating a df_world dataset containing world country values
df_world <- df_death %>%
filter(Country.name == 'World')
df_world
Visualizing distributions
df_death %>%
ggplot(aes(Number.of.deaths.from.disasters)) +
geom_histogram(binwidth = 500)
df_death %>%
ggplot(aes(Number.of.deaths.from.disasters)) +
geom_histogram(binwidth = 5) +
coord_cartesian(xlim = c(0, 50))
We can see above that the count for the number of deaths due to disasters is the highest when number of deaths ranges between 0 to 10 for each country.
Observing the numbers for different bin widths of the number of deaths from disasters
df_death %>%
count(cut_width(Number.of.deaths.from.disasters, 500))
df_death %>%
ggplot(aes(Number.of.deaths.from.disasters)) +
geom_histogram(binwidth = 1) +
coord_cartesian(xlim = c(0, 50))
Observing the outliers where the number of deaths from disasters is very high:
df_death %>%
ggplot(aes(Number.of.deaths.from.disasters)) +
geom_histogram(binwidth = 50) +
coord_cartesian(ylim = c(0, 50))
Analyzing the rows where number.of.deaths.from.disasters is greater than 5000. We can see Nepal in 2015 had the highest number of deaths.
df_death %>%
filter(Number.of.deaths.from.disasters > 5000) %>%
arrange(desc(Number.of.deaths.from.disasters)) %>%
select(Number.of.deaths.from.disasters, Country.name, Year)
#Milestone 2-a: The sequence of questions and findings.
Question 1: What are the total number of deaths from disasters per year worldwide?
library(ggplot2)
total_deaths_by_year <- df_death %>% filter(Country.name == "World")%>%
group_by(Year) %>% summarise(total_deaths = sum(Number.of.deaths.from.disasters))
ggplot(total_deaths_by_year, aes(x = Year, y = total_deaths)) +
geom_bar(stat = "identity", fill = "darkblue") +
labs(title = "Total Number of Deaths from Disasters per Year (World)",
x = "Year",
y = "Average Number of Deaths") +
theme_bw() +
scale_x_continuous(breaks = seq(min(total_deaths_by_year$Year), max(total_deaths_by_year$Year), by = 1))
Highest number of deaths due to natural disasters occured in the years 2013 and 2015 in the past decade.
Question 2:What types of natural disasters had the highest number of deaths in the past decade?
#Line chart over past decade for different types of disasters
library(ggplot2)
ggplot(df_world, aes(x = Year)) +
geom_line(aes(y = Number.of.deaths.from.drought, color = "Number.of.deaths.from.drought")) +
geom_line(aes(y = Number.of.deaths.from.earthquakes, color = "Number.of.deaths.from.earthquakes")) +
geom_line(aes(y = Number.of.deaths.from.volcanic.activity, color = "Number.of.deaths.from.volcanic.activity")) +
geom_line(aes(y = Number.of.deaths.from.floods, color = "Number.of.deaths.from.floods")) +
geom_line(aes(y = Number.of.deaths.from.mass.movements, color = "Number.of.deaths.from.mass.movements")) +
geom_line(aes(y = Number.of.deaths.from.storms, color = "Number.of.deaths.from.storms")) +
geom_line(aes(y = Number.of.deaths.from.landslides, color = "Number.of.deaths.from.landslides")) +
geom_line(aes(y = Number.of.deaths.from.fog, color = "Number.of.deaths.from.fog")) +
geom_line(aes(y = Number.of.deaths.from.wildfires, color = "Number.of.deaths.from.wildfires")) +
geom_line(aes(y = Number.of.deaths.from.extreme.temperatures, color = "Number.of.deaths.from.extreme.temperatures")) +
geom_line(aes(y = Number.of.deaths.from.glacial.lake.outbursts, color = "Number.of.deaths.from.glacial.lake.outbursts")) +
labs(color = "Series") +
xlab("X") +
ylab("Y") +
ggtitle("Line Chart for number of deaths for different types of disasters over the past decade")+
scale_x_continuous(breaks = seq(min(total_deaths_by_year$Year), max(total_deaths_by_year$Year), by = 1))
We can see from the above line chart that the global number of deaths in 2013 were the highest due to floods and in 2015 due to earthquakes.
Filtering out the non-country names/values:
df_death_country <- df_death %>%
filter(Country.name!='World')
df_death_country <- df_death %>%
filter(!(Country.name %in% c("World", "Africa", "Asia", "European Union (27)", "Europe", "North America", "South America", "Oceania"))) %>%
filter(!grepl("income", Country.name))
unique(df_death_country$Country.name)
[1] "Afghanistan" "Albania" "Algeria" "Angola"
[5] "Anguilla" "Antigua and Barbuda" "Argentina" "Armenia"
[9] "Australia" "Austria" "Azerbaijan" "Bahamas"
[13] "Bangladesh" "Barbados" "Belarus" "Belgium"
[17] "Belize" "Benin" "Bhutan" "Bolivia"
[21] "Bosnia and Herzegovina" "Botswana" "Brazil" "British Virgin Islands"
[25] "Bulgaria" "Burkina Faso" "Burundi" "Cambodia"
[29] "Cameroon" "Canada" "Cape Verde" "Central African Republic"
[33] "Chad" "Chile" "China" "Colombia"
[37] "Comoros" "Congo" "Costa Rica" "Cote d'Ivoire"
[41] "Croatia" "Cuba" "Cyprus" "Czechia"
[45] "Democratic Republic of Congo" "Denmark" "Djibouti" "Dominica"
[49] "Dominican Republic" "Ecuador" "Egypt" "El Salvador"
[53] "Estonia" "Eswatini" "Ethiopia" "Fiji"
[57] "France" "French Guiana" "French Polynesia" "Gabon"
[61] "Gambia" "Georgia" "Germany" "Ghana"
[65] "Greece" "Guadeloupe" "Guatemala" "Guinea"
[69] "Guinea-Bissau" "Guyana" "Haiti" "Honduras"
[73] "Hong Kong" "Hungary" "India" "Indonesia"
[77] "Iran" "Iraq" "Ireland" "Isle of Man"
[81] "Israel" "Italy" "Jamaica" "Japan"
[85] "Jordan" "Kazakhstan" "Kenya" "Kiribati"
[89] "Kuwait" "Kyrgyzstan" "Laos" "Latvia"
[93] "Lebanon" "Lesotho" "Liberia" "Libya"
[97] "Lithuania" "Luxembourg" "Macao" "Madagascar"
[101] "Malawi" "Malaysia" "Maldives" "Mali"
[105] "Marshall Islands" "Martinique" "Mauritania" "Mauritius"
[109] "Mexico" "Micronesia (country)" "Moldova" "Mongolia"
[113] "Montenegro" "Morocco" "Mozambique" "Myanmar"
[117] "Namibia" "Nepal" "Netherlands" "New Caledonia"
[121] "New Zealand" "Nicaragua" "Niger" "Nigeria"
[125] "North Korea" "North Macedonia" "Northern Mariana Islands" "Norway"
[129] "Oman" "Pakistan" "Palau" "Palestine"
[133] "Panama" "Papua New Guinea" "Paraguay" "Peru"
[137] "Philippines" "Poland" "Portugal" "Puerto Rico"
[141] "Qatar" "Reunion" "Romania" "Russia"
[145] "Rwanda" "Saint Barthelemy" "Saint Kitts and Nevis" "Saint Lucia"
[149] "Saint Martin (French part)" "Saint Vincent and the Grenadines" "Samoa" "Sao Tome and Principe"
[153] "Saudi Arabia" "Senegal" "Serbia" "Seychelles"
[157] "Sierra Leone" "Sint Maarten (Dutch part)" "Slovakia" "Slovenia"
[161] "Solomon Islands" "Somalia" "South Africa" "South Korea"
[165] "South Sudan" "Spain" "Sri Lanka" "Sudan"
[169] "Suriname" "Sweden" "Switzerland" "Syria"
[173] "Taiwan" "Tajikistan" "Tanzania" "Thailand"
[177] "Timor" "Togo" "Tonga" "Trinidad and Tobago"
[181] "Tunisia" "Turkey" "Turks and Caicos Islands" "Tuvalu"
[185] "Uganda" "Ukraine" "United Arab Emirates" "United Kingdom"
[189] "United States" "United States Virgin Islands" "Uruguay" "Uzbekistan"
[193] "Vanuatu" "Venezuela" "Vietnam" "Wallis and Futuna"
[197] "Yemen" "Zambia" "Zimbabwe"
Question 3: Which country was most affected by the floods in 2013?
#Dive deeper into 2013 and 2015 to analyse which country was most affected by the above disasters
# Select columns containing "flood" in their name
flood_cols <- c("Country.name", "Year", grep("flood", names(df_death_country), value = TRUE))
df_country_flood <- df_death_country[, flood_cols]
df_death_flood_2013 <- df_country_flood %>%
group_by(Country.name,Year) %>%
summarise(Number.of.deaths = sum(Number.of.deaths.from.floods), Number.of.people.affected = sum(Number.of.total.people.affected.by.floods)) %>%
filter(Year == "2013") %>%
arrange(desc(Number.of.deaths))
`summarise()` has grouped output by 'Country.name'. You can override using the `.groups` argument.
df_death_flood_2013
In 2013, the country that was most affected by floods was India with number of deaths = 6453
Question 4: Which country was most affected by the storms in 2013?
# Select columns containing "storm" in their name
storm_cols <- c("Country.name", "Year", grep("storm", names(df_death_country), value = TRUE))
df_country_storm <- df_death_country[, storm_cols]
df_death_storm_2013 <- df_country_storm %>%
group_by(Country.name, Year) %>%
summarize(Number.of.deaths = sum(Number.of.deaths.from.storms), Number.of.people.affected = sum(Number.of.total.people.affected.by.storms)) %>%
filter(Year == "2013") %>%
arrange(desc(Number.of.deaths))
`summarise()` has grouped output by 'Country.name'. You can override using the `.groups` argument.
most_affected_country_storm_2013 <- df_death_storm_2013$Country.name[1]
most_affected_country_storm_2013
[1] "Philippines"
Question 5: Which country was most affected by the earthquakes in 2015?
#Dive deeper into 2013 and 2015 to analyse which country was most affected by the above disasters
# Select columns containing "flood" in their name
earthquake_cols <- c("Country.name", "Year", grep("earthquake", names(df_death_country), value = TRUE))
df_country_earthquake <- df_death_country[, earthquake_cols]
df_country_earthquake %>%
group_by(Country.name,Year) %>%
summarise(Number.of.deaths = sum(Number.of.deaths.from.earthquakes), Number.of.people.affected = sum(Number.of.total.people.affected.by.earthquakes)) %>%
filter(Year == "2015") %>%
arrange(desc(Number.of.deaths))
`summarise()` has grouped output by 'Country.name'. You can override using the `.groups` argument.
In 2015, the country that was most affected by earthquakes was Nepal with number of deaths = 8969
Question 6: Which country has the highest proportion of deaths due to natural disasters as compared to the total number of global deaths due to disasters?
install.packages("gapminder")
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.2/gapminder_0.3.0.tgz'
Content type 'application/x-gzip' length 2031248 bytes (1.9 MB)
==================================================
downloaded 1.9 MB
The downloaded binary packages are in
/var/folders/c4/xtbqs4rn3jg2c0ccfbf2bm980000gn/T//RtmpaLtc10/downloaded_packages
library(gapminder)
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
library(ggplot2)
library(maps)
library(mapdata)
world <- map_data("world")
df1 <- df_death_country %>%
group_by(Country.name) %>%
summarize(Number.of.deaths.from.disasters.in.last.decade = sum(Number.of.deaths.from.disasters))
Total.deaths.from.disasters = sum(df1$Number.of.deaths.from.disasters.in.last.decade)
df_death_prop <- df1 %>%
mutate(death.proportion = round((Number.of.deaths.from.disasters.in.last.decade/Total.deaths.from.disasters)*100, 2)) %>%
arrange(desc(death.proportion))
ggplot() +
geom_map(data = df_death_prop, map = world,
aes(map_id = Country.name, fill = death.proportion),
color = "black", size = 0.5) +
scale_fill_gradient(low = "white", high = "blue", name = "Percentage of deaths") +
expand_limits(x = world$long, y = world$lat) +
theme_void()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
From the above figure, India and China have the highest proportion of deaths due to natural disasters in the past decade.
Question 7: Which type of disaster is the most fatal in India?
df_death_india <- df_death_country %>%
filter(Country.name == 'India')
df_death_india
# Convert the data frame from wide to long format using tidyr
df_death_india_long <- pivot_longer(df_death_india, cols = c("Number.of.deaths.from.drought", "Number.of.deaths.from.earthquakes", "Number.of.deaths.from.volcanic.activity","Number.of.deaths.from.floods","Number.of.deaths.from.mass.movements","Number.of.deaths.from.storms","Number.of.deaths.from.landslides","Number.of.deaths.from.fog","Number.of.deaths.from.wildfires","Number.of.deaths.from.extreme.temperatures","Number.of.deaths.from.glacial.lake.outbursts"), names_to = "variable", values_to = "value")
# Create the stacked bar chart
ggplot(df_death_india_long, aes(x = Year, y = value, fill = variable)) +
geom_bar(stat = "identity") +
labs(x = "Category", y = "Value", fill = "Variable") +
ggtitle("Stacked Bar Chart for Different types of disasters in India in last decade") +
scale_x_continuous(breaks = seq(min(total_deaths_by_year$Year), max(total_deaths_by_year$Year), by = 1))
The most fatal disaster in India has been floods in the last decade.
Question 8: What were the total number of deaths in India due to floods?
df_death_india_long <- df_death_india_long %>%
select("Country.name","Year","variable","value")
df_death_india_long %>%
group_by(variable) %>%
summarise(total_deaths = sum(value)) %>%
filter(variable=='Number.of.deaths.from.floods')
There were 18397 deaths due to earthquake in India in the last decade.
Question 9: Which type of disaster is the most fatal in China?
df_death_china <- df_death_country %>%
filter(Country.name == 'China')
df_death_china
# Convert the data frame from wide to long format using tidyr
df_death_china_long <- pivot_longer(df_death_china, cols = c("Number.of.deaths.from.drought", "Number.of.deaths.from.earthquakes", "Number.of.deaths.from.volcanic.activity","Number.of.deaths.from.floods","Number.of.deaths.from.mass.movements","Number.of.deaths.from.storms","Number.of.deaths.from.landslides","Number.of.deaths.from.fog","Number.of.deaths.from.wildfires","Number.of.deaths.from.extreme.temperatures","Number.of.deaths.from.glacial.lake.outbursts"), names_to = "variable", values_to = "value")
# Create the stacked bar chart
ggplot(df_death_china_long, aes(x = Year, y = value, fill = variable)) +
geom_bar(stat = "identity") +
labs(x = "Category", y = "Value", fill = "Variable") +
ggtitle("Stacked Bar Chart for Different types of disasters in China in last decade") +
scale_x_continuous(breaks = seq(min(total_deaths_by_year$Year), max(total_deaths_by_year$Year), by = 1))
The most fatal disaster in China has been also due to floods in the last decade.
Question 10: What were the total number of deaths in China due to floods?
install.packages("ggplot2")
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.2/ggplot2_3.4.1.tgz'
Content type 'application/x-gzip' length 4220621 bytes (4.0 MB)
==================================================
downloaded 4.0 MB
The downloaded binary packages are in
/var/folders/c4/xtbqs4rn3jg2c0ccfbf2bm980000gn/T//RtmpaLtc10/downloaded_packages
df_death_china_long <- df_death_china_long %>%
select("Country.name","Year","variable","value")
df_death_china_long %>%
group_by(variable) %>%
summarise(total_deaths = sum(value)) %>%
filter(variable=='Number.of.deaths.from.floods')
There were 4580 deaths due to floods in China in the last decade.
Question 11: Which were the most fatal disasters each year in the last decade?
cols <- c("Country.name", "Year", grep("Number.of.deaths", names(df_world), value = TRUE))
df_2 <- df_world[, cols]
df_2 <- pivot_longer(df_2, cols = c("Number.of.deaths.from.drought", "Number.of.deaths.from.earthquakes", "Number.of.deaths.from.volcanic.activity","Number.of.deaths.from.floods","Number.of.deaths.from.mass.movements","Number.of.deaths.from.storms","Number.of.deaths.from.landslides","Number.of.deaths.from.fog","Number.of.deaths.from.wildfires","Number.of.deaths.from.extreme.temperatures","Number.of.deaths.from.glacial.lake.outbursts"), names_to = "variable", values_to = "value")
df2 <- df_2 %>%
select(Country.name,Year,variable,value) %>%
group_by(Year) %>%
summarise(variable = variable[which.max(value)],
max_value = max(value, na.rm = TRUE))
df_plot <- df_2 %>%
select(Country.name, Year, variable, value) %>%
group_by(Year) %>%
summarise(variable = variable[which.max(value)],
max_value = max(value, na.rm = TRUE))
ggplot(df_plot, aes(x = Year, y = max_value, fill = variable)) +
geom_bar(stat = "identity") +
labs(x = "Year", y = "Maximum Value", fill = "Variable") +
scale_x_continuous(breaks = seq(min(total_deaths_by_year$Year), max(total_deaths_by_year$Year), by = 1))
NA
NA
Most fatal deaths in the last decade were due to earthquakes, extreme temperatures and floods.
Question 11: Which was the most fatal disaster in 2022?
df2 %>%
filter(Year==2022)
The most fatal disaster in the year 2022 was due to floods.
Question 12: What are the top 10 countries in terms of death due to natural disasters from 2012 to 2016?
top_10_2012 <- df_death_country %>%
filter(Year == 2012) %>%
arrange(desc(Number.of.deaths.from.disasters)) %>%
slice(1:5)
top_10_2013 <- df_death_country %>%
filter(Year == 2013) %>%
arrange(desc(Number.of.deaths.from.disasters)) %>%
slice(1:5)
top_10_2014 <- df_death_country %>%
filter(Year == 2014) %>%
arrange(desc(Number.of.deaths.from.disasters)) %>%
slice(1:5)
top_10_2015 <- df_death_country %>%
filter(Year == 2015) %>%
arrange(desc(Number.of.deaths.from.disasters)) %>%
slice(1:5)
top_10_2016 <- df_death_country %>%
filter(Year == 2016) %>%
arrange(desc(Number.of.deaths.from.disasters)) %>%
slice(1:5)
top_10_countries <- rbind(top_10_2012,top_10_2013,top_10_2014,top_10_2015,top_10_2016)
ggplot(top_10_countries, aes(x = Year, y = Number.of.deaths.from.disasters, group = Country.name, color = Country.name)) +
geom_line(linewidth = 1) +
scale_color_discrete(name = "Country") +
ggtitle("Top 10 Countries in terms of deaths due to natural disasters (2012 to 2016)") +
xlab("Year") +
ylab("Total deaths") +
theme_minimal() +
theme(plot.title = element_text(hjust = 1))
We can see that the top county in terms of death due to natural disasters in Nepal and India closely followed by the Philippines.
Question 13: What are the total number of people affected due to natural disasters region wise in the last decade?
df_region <- df_death %>%
filter((Country.name %in% c("Africa", "Asia", "European Union (27)","Europe", "North America", "South America", "Oceania")))
total_affected_cols <- c("Country.name", "Year", grep("Number.of.total.people.affected", names(df_region), value = TRUE))
df_total_affected_region <- df_region[, total_affected_cols]
colnames(df_total_affected_region) <- gsub("Number\\.of\\.total\\.people\\.affected\\.by\\.", "", colnames(df_total_affected_region))
df_total_affected_region_pivot <- df_total_affected_region %>%
pivot_longer(-c(Country.name, Year), names_to = "Variable", values_to = "Value")
df_total_affected_region_pivot <- df_total_affected_region_pivot %>%
group_by(Country.name,Variable) %>%
summarise(total.number.of.people.affected = sum(Value, na.rm = T))
`summarise()` has grouped output by 'Country.name'. You can override using the `.groups` argument.
df_total_affected_region_pivot %>%
ggplot() +
geom_tile(aes(Country.name,
Variable,
fill = total.number.of.people.affected)) +
scale_fill_viridis_c(option = "C",
guide = "legend") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) #rotate x-axis labels by 90 degrees
We can see that Asia has been the most affected by natural disasters mainly due to droughts, floods and storms.
Question 14: What is the death rate from natural disasters region wise in the last decade?
df_region
death_rates_cols <- c("Country.name", "Year", grep("Death.rates.from", names(df_region), value = TRUE))
df_death_region <- df_region[, death_rates_cols]
colnames(df_death_region) <- gsub("Death\\.rates\\.from\\.", "", colnames(df_death_region))
df_death_region_pivot <- df_death_region %>%
pivot_longer(-c(Country.name, Year), names_to = "Variable", values_to = "Value")
df_death_region_pivot <- df_death_region_pivot %>%
group_by(Country.name,Variable) %>%
summarise(death.rate = sum(Value, na.rm = T))
`summarise()` has grouped output by 'Country.name'. You can override using the `.groups` argument.
df_death_region_pivot %>%
ggplot() +
geom_tile(aes(Country.name,
Variable,
fill = death.rate)) +
scale_fill_viridis_c(option = "C",
guide = "legend") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) #rotate x-axis labels by 90 degrees
We see that the death rate higher in the Europe and European union due to extreme temperatures, followed by floods in other regions.
Question 15: What is the correlation between death rate and total number of people affected country wise?
df_death_country %>%
ggplot() +
geom_bin2d(
aes(x = log2(Number.of.total.people.affected.by.disasters),
y = log2(Death.rates.from.disasters))
) +
scale_fill_viridis_c(option = "H")
We can see that as the total number of people affected by disasters increases, the death rate also increases.
Question 16: How does the economic condition of a country impact the total number of people affected due to natural disasters?
df_economic <- df_death %>%
filter(grepl("income", Country.name))
total_affected_cols <- c("Country.name", "Year", grep("Number.of.total.people.affected", names(df_economic), value = TRUE))
df_total_affected_economic <- df_economic[, total_affected_cols]
colnames(df_total_affected_economic) <- gsub("Number\\.of\\.total\\.people\\.affected\\.by\\.", "", colnames(df_total_affected_economic))
df_total_affected_economic_pivot <- df_total_affected_economic %>%
pivot_longer(-c(Country.name, Year), names_to = "Variable", values_to = "Value")
df_total_affected_economic_pivot <- df_total_affected_economic_pivot %>%
group_by(Country.name,Variable) %>%
summarise(total.number.of.people.affected = sum(Value, na.rm = T))
`summarise()` has grouped output by 'Country.name'. You can override using the `.groups` argument.
df_total_affected_economic_pivot %>%
filter(!is.na(Country.name)) %>%
ggplot() +
geom_density(
aes(x = log2(total.number.of.people.affected),
color = Country.name,
fill = Country.name),
alpha = 0.5
)
The low-income countries have a comparatively higher number of people affected due to natural disasters.
#Milestone 2-b: Present the finding in tables 1. Top 10 countries affected by floods in 2013 which contributed to the highest number of deaths from disasters in the last decade.
library(knitr)
# Sort the data
df_death_flood_2013 <- df_death_flood_2013 %>%
arrange(desc(Number.of.deaths))
# Display the top 10 rows as a table with captions
kable(head(df_death_flood_2013, 10), caption = "Top 10 countries affected by floods in 2013 which contributed to the highest number of deaths from disasters in the last decade.")
| Country.name | Year | Number.of.deaths | Number.of.people.affected |
|---|---|---|---|
| India | 2013 | 6453 | 3419473 |
| China | 2013 | 637 | 7684030 |
| Pakistan | 2013 | 268 | 1497782 |
| Cambodia | 2013 | 200 | 1500000 |
| Nepal | 2013 | 195 | 16823 |
| Vietnam | 2013 | 141 | 2161001 |
| Kenya | 2013 | 128 | 115800 |
| Zimbabwe | 2013 | 125 | 9700 |
| Brazil | 2013 | 119 | 247410 |
| Mozambique | 2013 | 119 | 240000 |
NA
head(df_death_prop,10)
kable(head(df_death_prop, 10), caption = "Top 10 countries which have the highest proportion of deaths from natural disasters in the past decade.")
| Country.name | Number.of.deaths.from.disasters.in.last.decade | death.proportion |
|---|---|---|
| India | 26020 | 18.22 |
| Philippines | 12736 | 8.92 |
| Nepal | 11391 | 7.98 |
| China | 8166 | 5.72 |
| Pakistan | 7621 | 5.34 |
| Indonesia | 7517 | 5.27 |
| France | 6770 | 4.74 |
| Afghanistan | 4258 | 2.98 |
| United Kingdom | 3521 | 2.47 |
| Haiti | 3454 | 2.42 |
#Milestone 2-c: Present the finding in figures
Figure 1: Top 3 countries per Year in terms of death due to Natural Disaster
top_3_countries_per_year <- df_death_country %>%
group_by(Year) %>%
arrange(Year, desc(Number.of.deaths.from.disasters)) %>%
slice(1:3) %>%
ungroup() %>%
mutate(rank = row_number()) %>%
ggplot(aes(x = factor(Year), y = Number.of.deaths.from.disasters, fill = factor(rank))) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Top 3 Countries per Year in Terms of Death due to Natural Disasters (2012-2016)",
x = "Year",
y = "Total Number of Deaths",
fill = "Rank") +
theme_minimal()
top_3_countries_per_year
library(dplyr)
library(ggplot2)
n_colors <- n_distinct(df_death_country$Country.name)
color_palette <- setNames(unique(hcl(seq(15, 375, length.out = n_colors + 1))), c(levels(df_death_country$Country.name), "extra"))
top_3_countries_per_year <- df_death_country %>%
group_by(Year) %>%
arrange(Year, desc(Number.of.deaths.from.disasters)) %>%
slice(1:3) %>%
ungroup() %>%
mutate(rank = row_number()) %>%
ggplot(aes(x = factor(Year), y = Number.of.deaths.from.disasters, fill = Country.name)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Top 3 Countries per Year in Terms of Death due to Natural Disasters (2012-2016)",
x = "Year",
y = "Total Number of Deaths",
fill = "Country") +
scale_fill_manual(values = color_palette[1:n_colors],
labels = levels(df_death_country$Country.name)) +
theme_minimal()
top_3_countries_per_year
library(dplyr)
library(ggplot2)
library(RColorBrewer)
# create a new dataframe that contains the top 3 countries per year
top_3_countries_per_year <- df_death_country %>%
group_by(Year) %>%
arrange(Year, desc(Number.of.deaths.from.disasters)) %>%
slice(1:3) %>%
ungroup()
# create a vector of unique country names
unique_countries <- top_3_countries_per_year %>% distinct(Country.name) %>% pull(Country.name)
# create a color palette based on the number of unique countries
n_colors <- min(9, length(unique_countries))
color_palette <- brewer.pal(n_colors, "Set1")
# create a named vector of colors for each country
country_colors <- setNames(color_palette, unique_countries[1:length(color_palette)])
# add a new column to the dataframe with the colors for each country
top_3_countries_per_year <- top_3_countries_per_year %>% mutate(color = country_colors[Country.name])
# create the bar graph
ggplot(top_3_countries_per_year, aes(x = factor(Year), y = Number.of.deaths.from.disasters, fill = Country.name)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = country_colors) +
labs(title = "Top 3 Countries per Year in Terms of Death due to Natural Disasters (2012-2022)",
x = "Year",
y = "Total Number of Deaths",
fill = "Country") +
theme_minimal()
Figure 2: Number of deaths per type of disaster between 2012 and 2022
Figure 3:Total number of people affected by natural disasters per region between 2012 and 2022.